import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import seaborn as sns
import plotly.graph_objects as go
import plotly.io as pio
c = pd.read_csv('covid_19_clean_complete.csv')
c.head()
| Province/State | Country/Region | Lat | Long | Date | Confirmed | Deaths | Recovered | Active | WHO Region | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | NaN | Afghanistan | 33.93911 | 67.709953 | 2020-01-22 | 0 | 0 | 0 | 0 | Eastern Mediterranean |
| 1 | NaN | Albania | 41.15330 | 20.168300 | 2020-01-22 | 0 | 0 | 0 | 0 | Europe |
| 2 | NaN | Algeria | 28.03390 | 1.659600 | 2020-01-22 | 0 | 0 | 0 | 0 | Africa |
| 3 | NaN | Andorra | 42.50630 | 1.521800 | 2020-01-22 | 0 | 0 | 0 | 0 | Europe |
| 4 | NaN | Angola | -11.20270 | 17.873900 | 2020-01-22 | 0 | 0 | 0 | 0 | Africa |
c.describe()
| Lat | Long | Confirmed | Deaths | Recovered | Active | |
|---|---|---|---|---|---|---|
| count | 49068.000000 | 49068.000000 | 4.906800e+04 | 49068.000000 | 4.906800e+04 | 4.906800e+04 |
| mean | 21.433730 | 23.528236 | 1.688490e+04 | 884.179160 | 7.915713e+03 | 8.085012e+03 |
| std | 24.950320 | 70.442740 | 1.273002e+05 | 6313.584411 | 5.480092e+04 | 7.625890e+04 |
| min | -51.796300 | -135.000000 | 0.000000e+00 | 0.000000 | 0.000000e+00 | -1.400000e+01 |
| 25% | 7.873054 | -15.310100 | 4.000000e+00 | 0.000000 | 0.000000e+00 | 0.000000e+00 |
| 50% | 23.634500 | 21.745300 | 1.680000e+02 | 2.000000 | 2.900000e+01 | 2.600000e+01 |
| 75% | 41.204380 | 80.771797 | 1.518250e+03 | 30.000000 | 6.660000e+02 | 6.060000e+02 |
| max | 71.706900 | 178.065000 | 4.290259e+06 | 148011.000000 | 1.846641e+06 | 2.816444e+06 |
c.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 49068 entries, 0 to 49067 Data columns (total 10 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Province/State 14664 non-null object 1 Country/Region 49068 non-null object 2 Lat 49068 non-null float64 3 Long 49068 non-null float64 4 Date 49068 non-null object 5 Confirmed 49068 non-null int64 6 Deaths 49068 non-null int64 7 Recovered 49068 non-null int64 8 Active 49068 non-null int64 9 WHO Region 49068 non-null object dtypes: float64(2), int64(4), object(4) memory usage: 3.7+ MB
c.shape
(49068, 10)
c.isna().sum()
Province/State 34404 Country/Region 0 Lat 0 Long 0 Date 0 Confirmed 0 Deaths 0 Recovered 0 Active 0 WHO Region 0 dtype: int64
c.drop('Province/State', axis = 1, inplace = True)
c['Date'] = pd.to_datetime(c['Date'])
who =c.groupby('WHO Region')[['WHO Region','Deaths']].sum().sort_values(by=['Deaths'],ascending=False).head(10)
who.reset_index(inplace = True)
C:\Users\karth\AppData\Local\Temp\ipykernel_22732\1601602620.py:1: FutureWarning: The default value of numeric_only in DataFrameGroupBy.sum is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.
who =c.groupby('WHO Region')[['WHO Region','Deaths']].sum().sort_values(by=['Deaths'],ascending=False).head(10)
who.columns
Index(['WHO Region', 'Deaths'], dtype='object')
fig= px.bar(who,x='Deaths', y='WHO Region',color = 'WHO Region', title = 'Records of Deaths in the WHO Region' )
fig.show()
who_is_who = c['WHO Region'].unique()
who_is_who
array(['Eastern Mediterranean', 'Europe', 'Africa', 'Americas',
'Western Pacific', 'South-East Asia'], dtype=object)
recovered = c.groupby('WHO Region')[['WHO Region','Recovered']].sum().sort_values(by=['Recovered'],ascending=False)
recovered.reset_index(inplace = True)
C:\Users\karth\AppData\Local\Temp\ipykernel_22732\756317827.py:1: FutureWarning: The default value of numeric_only in DataFrameGroupBy.sum is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.
fig = px.pie(recovered, values ='Recovered',names= 'WHO Region', title = 'Total Recovered in the WHO Region', hole = 0.4 )
fig.show()
group_who = c.groupby('WHO Region')[['WHO Region', 'Active','Recovered','Deaths']].sum()
group_who.reset_index(inplace= True)
C:\Users\karth\AppData\Local\Temp\ipykernel_22732\1700114451.py:1: FutureWarning: The default value of numeric_only in DataFrameGroupBy.sum is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.
anchos = [0.2] * 6
fig = go.Figure()
fig.add_trace(go.Bar(x = group_who['WHO Region'],
y = group_who['Recovered'],
width = anchos, name = 'Recovered Cases'))
fig.add_trace(go.Bar(x = group_who['WHO Region'],
y = group_who['Deaths'],
width = anchos, name = 'Death Cases'))
fig.add_trace(go.Bar(x = group_who['WHO Region'],
y = group_who['Active'],
width = anchos, name = 'Active Cases'))
fig.update_layout(title = "WHO Regions Stats",
barmode = 'group', title_font_size = 40)
fig.update_xaxes(title_text = 'WHO Regions')
fig.update_yaxes(title_text = "Number of cases")
fig.show()
fig = go.Figure()
fig.add_trace(go.Bar(x = group_who['WHO Region'],
y = group_who['Recovered'],
name = 'Recovered Cases'))
fig.add_trace(go.Bar(x = group_who['WHO Region'],
y = group_who['Active'],
name = 'Active Cases'))
fig.add_trace(go.Bar(x = group_who['WHO Region'],
y = group_who['Deaths'],
name = 'Death Cases'))
fig.update_layout(title = "WHO Regions Stats",
barmode = 'stack', title_font_size = 40)
fig.update_xaxes(title_text = 'WHO Regions')
fig.update_yaxes(title_text = "Number of cases")
rs = np.random.RandomState(0)
df = pd.DataFrame(rs.rand(10, 10))
corr = c.corr()
corr.style.background_gradient(cmap='coolwarm').set_precision(2)
C:\Users\karth\AppData\Local\Temp\ipykernel_22732\308013264.py:3: FutureWarning: The default value of numeric_only in DataFrame.corr is deprecated. In a future version, it will default to False. Select only valid columns or specify the value of numeric_only to silence this warning. C:\Users\karth\AppData\Local\Temp\ipykernel_22732\308013264.py:4: FutureWarning: this method is deprecated in favour of `Styler.format(precision=..)`
| Lat | Long | Confirmed | Deaths | Recovered | Active | |
|---|---|---|---|---|---|---|
| Lat | 1.00 | -0.13 | 0.04 | 0.07 | 0.02 | 0.04 |
| Long | -0.13 | 1.00 | -0.08 | -0.10 | -0.05 | -0.09 |
| Confirmed | 0.04 | -0.08 | 1.00 | 0.91 | 0.90 | 0.95 |
| Deaths | 0.07 | -0.10 | 0.91 | 1.00 | 0.76 | 0.89 |
| Recovered | 0.02 | -0.05 | 0.90 | 0.76 | 1.00 | 0.71 |
| Active | 0.04 | -0.09 | 0.95 | 0.89 | 0.71 | 1.00 |
alg = c[c['Country/Region']== 'Algeria'][['Date','Recovered','Deaths','Active']]
alg.set_index('Date',inplace = True)
alg
| Recovered | Deaths | Active | |
|---|---|---|---|
| Date | |||
| 2020-01-22 | 0 | 0 | 0 |
| 2020-01-23 | 0 | 0 | 0 |
| 2020-01-24 | 0 | 0 | 0 |
| 2020-01-25 | 0 | 0 | 0 |
| 2020-01-26 | 0 | 0 | 0 |
| ... | ... | ... | ... |
| 2020-07-23 | 17369 | 1124 | 6991 |
| 2020-07-24 | 17369 | 1136 | 7654 |
| 2020-07-25 | 18076 | 1146 | 7542 |
| 2020-07-26 | 18088 | 1155 | 8114 |
| 2020-07-27 | 18837 | 1163 | 7973 |
188 rows × 3 columns
alg.resample('M')
<pandas.core.resample.DatetimeIndexResampler object at 0x000002B08E8EE250>
sns.set(rc = {'figure.figsize':(15,8)})
sns.lineplot(data = alg)
<Axes: xlabel='Date'>
latest = c[c['Date'] == c['Date'].max()]
latest_map = latest.groupby('Country/Region')['Active','Confirmed','Deaths','Recovered'].sum().reset_index()
latest_map.head()
C:\Users\karth\AppData\Local\Temp\ipykernel_22732\4070237916.py:2: FutureWarning: Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.
| Country/Region | Active | Confirmed | Deaths | Recovered | |
|---|---|---|---|---|---|
| 0 | Afghanistan | 9796 | 36263 | 1269 | 25198 |
| 1 | Albania | 1991 | 4880 | 144 | 2745 |
| 2 | Algeria | 7973 | 27973 | 1163 | 18837 |
| 3 | Andorra | 52 | 907 | 52 | 803 |
| 4 | Angola | 667 | 950 | 41 | 242 |
fig = px.choropleth(latest_map, locations ='Country/Region',
locationmode = 'country names',color = 'Deaths',
range_color = [1,10000]
)
fig.update_layout(title ="Death Records Across The World")
fig.show()
fig = px.choropleth(c,locations='Country/Region',locationmode='country names',color='Confirmed')
fig.update_layout(title='Choropleth Map of Confirmed Cases -till today',template="plotly_dark")
fig.show()
world_cases = c.groupby('Date')['Active',
'Confirmed','Deaths'].sum().reset_index()
world_cases
C:\Users\karth\AppData\Local\Temp\ipykernel_22732\944271916.py:1: FutureWarning: Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.
| Date | Active | Confirmed | Deaths | |
|---|---|---|---|---|
| 0 | 2020-01-22 | 510 | 555 | 17 |
| 1 | 2020-01-23 | 606 | 654 | 18 |
| 2 | 2020-01-24 | 879 | 941 | 26 |
| 3 | 2020-01-25 | 1353 | 1434 | 42 |
| 4 | 2020-01-26 | 2010 | 2118 | 56 |
| ... | ... | ... | ... | ... |
| 183 | 2020-07-23 | 6166006 | 15510481 | 633506 |
| 184 | 2020-07-24 | 6212290 | 15791645 | 639650 |
| 185 | 2020-07-25 | 6243930 | 16047190 | 644517 |
| 186 | 2020-07-26 | 6309711 | 16251796 | 648621 |
| 187 | 2020-07-27 | 6358362 | 16480485 | 654036 |
188 rows × 4 columns
sns.lineplot(x = 'Date', y ='Active', data = world_cases)
<Axes: xlabel='Date', ylabel='Active'>
for i in range(len(c)):
if(c['Deaths'][i] > 0):
print(c['Deaths'][i], c['Country/Region'][i], c['Date'][i])
break
17 China 2020-01-22 00:00:00
c['date']=pd.to_datetime(c['Date']).dt.to_period('M')
c.head(3)
| Country/Region | Lat | Long | Date | Confirmed | Deaths | Recovered | Active | WHO Region | date | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Afghanistan | 33.93911 | 67.709953 | 2020-01-22 | 0 | 0 | 0 | 0 | Eastern Mediterranean | 2020-01 |
| 1 | Albania | 41.15330 | 20.168300 | 2020-01-22 | 0 | 0 | 0 | 0 | Europe | 2020-01 |
| 2 | Algeria | 28.03390 | 1.659600 | 2020-01-22 | 0 | 0 | 0 | 0 | Africa | 2020-01 |
px.scatter(data_frame = c, x='Active',y='Deaths',color='WHO Region',title='The relation between the Active and death cases',
log_x= True,
range_y=[25,95],
animation_frame= 'date'
)
pio.templates.default = "plotly_dark"
px.scatter_3d(c,x="Confirmed",y="Deaths",z="Recovered")
px.line(c,x="Date",y="Recovered",title="Wolrd Wide Recovered")